# -*- coding: utf-8 -*-
"""KNN.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1tN9y3AXogR3Skk5-h56G8qrLdkI02OfT
"""

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler # Important for K-NN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Create a DataFrame for better visualization (optional but good practice)
df = pd.DataFrame(X, columns=feature_names)
df['species'] = y
print("Dataset Head:")
print(df.head())
print("\nTarget Names:", target_names)
print("Feature Names:", feature_names)
print("Class Distribution:", np.bincount(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("\nFeatures scaled successfully.")
print("X_train_scaled (first 5 rows):\n", X_train_scaled[:5])
# Initialize the K-NN classifier
# n_neighbors: Number of neighbors to use.
# metric: The distance metric to use (e.g., 'minkowski' is default, and 'p=2' for Euclidean).
model_knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')

# Train the model (this is just storing the data for K-NN)
model_knn.fit(X_train_scaled, y_train)

print("\nK-NN model 'trained' (data stored).")
y_pred = model_knn.predict(X_test_scaled)

print("\nPredicted class labels (first 5 samples):\n", y_pred[:5])
print("True class labels (first 5 samples):\n", y_test[:5])
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.2f}")

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", conf_matrix)

# Classification Report
class_report = classification_report(y_test, y_pred, target_names=target_names)
print("\nClassification Report:\n", class_report)

# Visualize Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=target_names,
            yticklabels=target_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix for K-NN Classifier (K=5)')
plt.show()